In [ ]:
import gzip
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
redfin_df = pd.read_csv(r"C:\Users\wilke\Downloads\redfin_metro_market_tracker.tsv000.gz",
                        compression='gzip',sep = '\t',header = 0,quotechar='"')
In [ ]:
oh_bool = []

for i in redfin_df['parent_metro_region']:
    bool = 'OH' in i
    oh_bool.append(bool)

oh_metrics = redfin_df[oh_bool]    
In [ ]:
oh_metrics.sort_values(['parent_metro_region','period_begin'])
Out[ ]:
period_begin period_end period_duration region_type region_type_id table_id is_seasonally_adjusted region city state ... sold_above_list_yoy price_drops price_drops_mom price_drops_yoy off_market_in_two_weeks off_market_in_two_weeks_mom off_market_in_two_weeks_yoy parent_metro_region parent_metro_region_metro_code last_updated
13072 2012-01-01 2012-01-31 30 metro -2 10420 f Akron, OH metro area NaN NaN ... -0.031343 NaN NaN NaN 0.032086 -0.017707 0.021275 Akron, OH 10420 2023-06-12 18:59:53
155928 2012-01-01 2012-01-31 30 metro -2 10420 f Akron, OH metro area NaN NaN ... -0.125000 NaN NaN NaN 0.000000 0.000000 0.000000 Akron, OH 10420 2023-06-12 18:59:53
189842 2012-01-01 2012-01-31 30 metro -2 10420 f Akron, OH metro area NaN NaN ... 0.070000 NaN NaN NaN 0.000000 -0.071429 0.000000 Akron, OH 10420 2023-06-12 18:59:53
246828 2012-01-01 2012-01-31 30 metro -2 10420 f Akron, OH metro area NaN NaN ... NaN NaN NaN NaN NaN NaN NaN Akron, OH 10420 2023-06-12 18:59:53
330225 2012-01-01 2012-01-31 30 metro -2 10420 f Akron, OH metro area NaN NaN ... -0.035880 NaN NaN NaN 0.036364 -0.014095 0.024388 Akron, OH 10420 2023-06-12 18:59:53
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
213821 2023-04-01 2023-04-30 30 metro -2 49780 f Zanesville, OH metro area NaN NaN ... -0.189678 0.223881 0.018752 0.118617 0.347222 0.019353 0.049925 Zanesville, OH 49780 2023-06-12 18:59:53
78549 2023-05-01 2023-05-31 30 metro -2 49780 f Zanesville, OH metro area NaN NaN ... -0.086996 0.246753 0.039857 0.045570 0.400000 0.062162 0.034146 Zanesville, OH 49780 2023-06-12 18:59:53
154272 2023-05-01 2023-05-31 30 metro -2 49780 f Zanesville, OH metro area NaN NaN ... 0.000000 0.200000 NaN NaN 0.666667 0.666667 0.666667 Zanesville, OH 49780 2023-06-12 18:59:53
312699 2023-05-01 2023-05-31 30 metro -2 49780 f Zanesville, OH metro area NaN NaN ... -0.074074 0.244755 0.020875 0.032255 0.381579 0.034357 0.027149 Zanesville, OH 49780 2023-06-12 18:59:53
317645 2023-05-01 2023-05-31 30 metro -2 49780 f Zanesville, OH metro area NaN NaN ... -1.000000 0.500000 NaN NaN 1.000000 NaN 0.000000 Zanesville, OH 49780 2023-06-12 18:59:53

18561 rows × 58 columns

In [ ]:
oh_metrics.columns
Out[ ]:
Index(['period_begin', 'period_end', 'period_duration', 'region_type',
       'region_type_id', 'table_id', 'is_seasonally_adjusted', 'region',
       'city', 'state', 'state_code', 'property_type', 'property_type_id',
       'median_sale_price', 'median_sale_price_mom', 'median_sale_price_yoy',
       'median_list_price', 'median_list_price_mom', 'median_list_price_yoy',
       'median_ppsf', 'median_ppsf_mom', 'median_ppsf_yoy', 'median_list_ppsf',
       'median_list_ppsf_mom', 'median_list_ppsf_yoy', 'homes_sold',
       'homes_sold_mom', 'homes_sold_yoy', 'pending_sales',
       'pending_sales_mom', 'pending_sales_yoy', 'new_listings',
       'new_listings_mom', 'new_listings_yoy', 'inventory', 'inventory_mom',
       'inventory_yoy', 'months_of_supply', 'months_of_supply_mom',
       'months_of_supply_yoy', 'median_dom', 'median_dom_mom',
       'median_dom_yoy', 'avg_sale_to_list', 'avg_sale_to_list_mom',
       'avg_sale_to_list_yoy', 'sold_above_list', 'sold_above_list_mom',
       'sold_above_list_yoy', 'price_drops', 'price_drops_mom',
       'price_drops_yoy', 'off_market_in_two_weeks',
       'off_market_in_two_weeks_mom', 'off_market_in_two_weeks_yoy',
       'parent_metro_region', 'parent_metro_region_metro_code',
       'last_updated'],
      dtype='object')
In [ ]:
plt.figure(figsize = (20,20))
sns.heatmap(oh_metrics.corr())
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1925643702.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  sns.heatmap(oh_metrics.corr())
Out[ ]:
<Axes: >

metrics to keep based on relationships¶

  • median_list_price
  • median_sale_price
  • months_of_supply
  • months_of_supply_mom
  • months_of_supply_yoy
  • homes_sold
  • homes_sold_mom
  • homes_sold_yoy
  • median_list_ppsf
  • median_list_ppsf_mom
  • median_list_ppsf_yoy
  • inventory
  • inventory_mom
  • inventory_yoy
  • avg_sale_to_list
  • sold_above_list
  • sold_above_list_mom
  • sold_above_list_yoy
  • off_market_in_two_weeks
  • price_drops
  • pending_sales
  • pending_sale_mom
  • pending_sales_yoy
In [ ]:
metrics_df = oh_metrics[['period_begin',
            'property_type',
            'parent_metro_region',
            'median_sale_price',
            'median_sale_price_mom',
            'median_sale_price_yoy',
            'median_list_price',
            'median_list_price_yoy',
            'homes_sold',
            'homes_sold_mom',
            'homes_sold_yoy',
            'median_list_ppsf',
            'median_list_ppsf_mom',
            'median_list_ppsf_yoy',
            'inventory',
            'inventory_mom',
            'inventory_yoy',
            'homes_sold',
            'homes_sold_mom',
            'homes_sold_yoy',
            'avg_sale_to_list',
            'sold_above_list',
            'sold_above_list_mom',
            'sold_above_list_yoy',
            'off_market_in_two_weeks',
            'price_drops',
            'pending_sales',
            'pending_sales_mom',
            'pending_sales_yoy'
            ]]
In [ ]:
ind = 1
plt.figure(figsize=(40,40))
for i in metrics_df.columns:
    plt.subplot(len(metrics_df.columns),2,ind)
    plt.hist(metrics_df[i])
    plt.title(i)
    ind += 1
In [ ]:
pd.options.display.float_format = '{:.10f}'.format

metrics_df.describe()
Out[ ]:
median_sale_price median_sale_price_mom median_sale_price_yoy median_list_price median_list_price_yoy homes_sold homes_sold_mom homes_sold_yoy median_list_ppsf median_list_ppsf_mom ... homes_sold_yoy avg_sale_to_list sold_above_list sold_above_list_mom sold_above_list_yoy off_market_in_two_weeks price_drops pending_sales pending_sales_mom pending_sales_yoy
count 18561.0000000000 16939.0000000000 16795.0000000000 17344.0000000000 15853.0000000000 18561.0000000000 16939.0000000000 16795.0000000000 17261.0000000000 15932.0000000000 ... 16795.0000000000 18513.0000000000 18561.0000000000 16939.0000000000 16795.0000000000 16963.0000000000 7169.0000000000 16963.0000000000 15409.0000000000 15190.0000000000
mean 123316.5856365498 2.6377743553 0.8916561840 130962.6048489391 0.1396328997 174.1885674263 0.1326050315 0.2479267285 85.1312645735 0.0635229108 ... 0.2479267285 0.9505730064 0.1698789102 0.0013200587 0.0190135428 0.1666603519 0.1961144420 146.4138418912 0.1445931898 0.3313157016
std 55459.4255516965 320.8536340744 81.2828784318 55850.5597926305 0.9093015223 489.2566480623 0.8071128567 1.1948822229 34.8803575591 1.1781774974 ... 1.1948822229 0.0543684326 0.1863047256 0.1885512968 0.1962365221 0.2075858628 0.1235232663 415.3071894373 0.7492046057 1.3550770078
min 1.0000000000 -0.9999722222 -0.9999560440 1200.0000000000 -0.9499893594 1.0000000000 -0.9230769231 -0.9600000000 0.1944273436 -0.9895371339 ... -0.9600000000 0.5003335557 0.0000000000 -1.0000000000 -1.0000000000 0.0000000000 0.0000725268 1.0000000000 -0.9285714286 -0.9230769231
25% 88000.0000000000 -0.1115323163 -0.0582759969 94900.0000000000 -0.0563340006 4.0000000000 -0.1875000000 -0.1304347826 64.6925566343 -0.0730573835 ... -0.1304347826 0.9337706239 0.0000000000 -0.0430191470 -0.0283190404 0.0000000000 0.1282798834 4.0000000000 -0.2000000000 -0.1176470588
50% 120000.0000000000 0.0055865922 0.0714285714 124997.5000000000 0.0606428138 24.0000000000 0.0000000000 0.0209059233 83.2407407407 0.0046513262 ... 0.0209059233 0.9577922377 0.1290322581 0.0000000000 0.0006609385 0.0909090909 0.1787164907 18.0000000000 0.0000000000 0.0833333333
75% 152000.0000000000 0.1392399267 0.2307692308 158462.5000000000 0.1915384615 66.0000000000 0.2307692308 0.2692307692 104.1666666667 0.0946080366 ... 0.2692307692 0.9781873140 0.2444444444 0.0479819069 0.0727897036 0.2551667378 0.2352941176 52.0000000000 0.2727272727 0.4000000000
max 1486000.0000000000 41749.0000000000 10499.0000000000 1100000.0000000000 72.7083333333 3599.0000000000 30.0000000000 32.0000000000 982.7898550725 133.2155634735 ... 32.0000000000 1.4875884434 1.0000000000 1.0000000000 1.0000000000 1.0000000000 1.0000000000 3591.0000000000 17.3333333333 52.0000000000

8 rows × 26 columns

In [ ]:
metrics_df['parent_metro_region'].unique()
Out[ ]:
array(['Mansfield, OH', 'Springfield, OH', 'Dayton, OH', 'Tiffin, OH',
       'Marietta, OH', 'Toledo, OH', 'Ashtabula, OH', 'Celina, OH',
       'Norwalk, OH', 'Columbus, OH', 'Zanesville, OH', 'Lima, OH',
       'Washington Court House, OH', 'Chillicothe, OH', 'Findlay, OH',
       'Mount Vernon, OH', 'Sandusky, OH', 'Akron, OH', 'Urbana, OH',
       'Salem, OH', 'Marion, OH', 'Greenville, OH', 'Wilmington, OH',
       'Cambridge, OH', 'Ashland, OH', 'Canton, OH', 'Cincinnati, OH',
       'New Philadelphia, OH', 'Wooster, OH', 'Wapakoneta, OH',
       'Coshocton, OH', 'Bucyrus, OH', 'Sidney, OH', 'Defiance, OH',
       'Bellefontaine, OH', 'Fremont, OH', 'Athens, OH', 'Van Wert, OH',
       'Jackson, OH', 'Youngstown, OH', 'Portsmouth, OH', 'Cleveland, OH'],
      dtype=object)
In [ ]:
dayton_market = metrics_df[metrics_df['parent_metro_region'] == 'Dayton, OH']
columbus_market = metrics_df[metrics_df['parent_metro_region'] == 'Columbus, OH']
cincinnati_market = metrics_df[metrics_df['parent_metro_region'] == 'Cincinnati, OH']
cleveland_market = metrics_df[metrics_df['parent_metro_region'] == 'Cleveland, OH']
In [ ]:
plt.hist(dayton_market['property_type'])
Out[ ]:
(array([137.,   0., 137.,   0.,   0., 137.,   0., 128.,   0., 137.]),
 array([0. , 0.4, 0.8, 1.2, 1.6, 2. , 2.4, 2.8, 3.2, 3.6, 4. ]),
 <BarContainer object of 10 artists>)
In [ ]:
dayton_townhouse = dayton_market[dayton_market['property_type'] == 'Townhouse'].sort_values('period_begin')
dayton_family_home = dayton_market[dayton_market['property_type'] == 'Single Family Residential'].sort_values('period_begin')
dayton_condo = dayton_market[dayton_market['property_type'] == 'Condo/Co-op'].sort_values('period_begin')
dayton_multi_family = dayton_market[dayton_market['property_type'] == 'Multi-Family (2-4 Unit)'].sort_values('period_begin')
dayton_residential = dayton_market[dayton_market['property_type'] == 'All Residential'].sort_values('period_begin')
In [ ]:
metrics = [ 'median_sale_price',
            'inventory',
            'median_list_price',
            'homes_sold',
            'avg_sale_to_list',
            'sold_above_list',
            'homes_sold',
            'median_list_ppsf',
            'pending_sales',
            'off_market_in_two_weeks',
            'price_drops',
            ]



fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics):
    ax[ind].plot(dayton_townhouse['period_begin'],dayton_townhouse[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_townhouse['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\3672131972.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_townhouse['period_begin'],rotation = 90)
In [ ]:
metrics = [ 'median_sale_price',
            'inventory',
            'median_list_price',
            'homes_sold',
            'avg_sale_to_list',
            'sold_above_list',
            'homes_sold',
            'median_list_ppsf',
            'pending_sales',
            'off_market_in_two_weeks',
            'price_drops',
            ]



fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics):
    ax[ind].plot(dayton_family_home['period_begin'],dayton_family_home[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_family_home['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\2519425690.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_family_home['period_begin'],rotation = 90)
In [ ]:
metrics = [ 'median_sale_price',
            'inventory',
            'median_list_price',
            'homes_sold',
            'avg_sale_to_list',
            'sold_above_list',
            'homes_sold',
            'median_list_ppsf',
            'pending_sales',
            'off_market_in_two_weeks',
            'price_drops',
            ]



fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics):
    ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1762790433.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
In [ ]:
metrics = [ 'median_sale_price',
            'inventory',
            'median_list_price',
            'homes_sold',
            'avg_sale_to_list',
            'sold_above_list',
            'homes_sold',
            'median_list_ppsf',
            'pending_sales',
            'off_market_in_two_weeks',
            'price_drops',
            ]



fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics):
    ax[ind].plot(dayton_multi_family['period_begin'],dayton_multi_family[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_multi_family['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\2919694956.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_multi_family['period_begin'],rotation = 90)
In [ ]:
metrics = [ 'median_sale_price',
            'inventory',
            'median_list_price',
            'homes_sold',
            'avg_sale_to_list',
            'sold_above_list',
            'homes_sold',
            'median_list_ppsf',
            'pending_sales',
            'off_market_in_two_weeks',
            'price_drops',
            ]



fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics):
    ax[ind].plot(dayton_residential['period_begin'],dayton_residential[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_residential['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\3061486996.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_residential['period_begin'],rotation = 90)
In [ ]:
            'median_sale_price_mom',
            'median_sale_price_yoy',
            'median_list_price_yoy',
            'inventory_mom',
            'inventory_yoy',
            'homes_sold_mom',
            'homes_sold_yoy',
            'avg_sale_to_list',
            'sold_above_list',
            'sold_above_list_mom',
            'sold_above_list_yoy',
            'off_market_in_two_weeks',
            'price_drops',
            'pending_sales',
            'pending_sales_mom',
            'pending_sales_yoy'
            'median_list_ppsf_yoy'
            'median_list_ppsf_mom'
In [ ]:
metrics_mom = [ 'median_sale_price_mom',
            'inventory_mom',
            'homes_sold_mom',
            'sold_above_list_mom',
            'median_list_ppsf_mom',
            'pending_sales_mom'
            ]



fig, ax = plt.subplots(6,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics_mom):
    ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1222752665.py:20: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
In [ ]:
metrics_mom = [ 'median_sale_price_mom',
            'inventory_mom',
            'homes_sold_mom',
            'sold_above_list_mom',
            'median_list_ppsf_mom',
            'pending_sales_mom'
            ]



fig, ax = plt.subplots(6,1,sharex = True,figsize = (20,30))

fig.tight_layout(pad = 5)

ax = ax.flatten()

for ind,i in enumerate(metrics_mom):
    ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
    ax[ind].set_title(i)
    ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
In [ ]:
dayton_townhouse.columns
Out[ ]:
Index(['period_begin', 'property_type', 'parent_metro_region',
       'median_sale_price', 'median_sale_price_mom', 'median_sale_price_yoy',
       'median_list_price', 'median_list_price_yoy', 'homes_sold',
       'homes_sold_mom', 'homes_sold_yoy', 'median_list_ppsf',
       'median_list_ppsf_mom', 'median_list_ppsf_yoy', 'inventory',
       'inventory_mom', 'inventory_yoy', 'homes_sold', 'homes_sold_mom',
       'homes_sold_yoy', 'avg_sale_to_list', 'sold_above_list',
       'sold_above_list_mom', 'sold_above_list_yoy', 'off_market_in_two_weeks',
       'price_drops', 'pending_sales', 'pending_sales_mom',
       'pending_sales_yoy'],
      dtype='object')
In [ ]: